import pandas as pd
import pymysql

df = pd.read_csv('dataset.csv')
print(df.head())

# 创建数据库连接
connection = pymysql.connect(host='localhost',
                             user='root',
                             password='123456',
                             database='hqyj',
                             port=3306,
                             charset='utf8')

cursor = connection.cursor()

cursor.execute(
    '''create table if not exists dataset(
        id int primary key auto_increment, 
        name varchar(255), 
        age int, 
        nationality varchar(255), 
        education varchar(20),
        gender varchar(20));'''
)

#  按行循环df读取数据，插入数据库
for index, row in df.iterrows():
    # print(row)
    # print(row.values)

    params = (row['name'], row['age'], row['nationality'], row['education'], row['gender'])
    # params = list(row.values[1:])
    print(params)
    sql = "insert into dataset(name, age, nationality, education, gender) values(%s, %s, %s, %s, %s);"
    cursor.execute(sql, params)

connection.commit()

print("年龄 >= 20 的所有数据：")
sql = "select * from dataset where age >= 20;"
cursor.execute(sql)
result = cursor.fetchall()
print(result)

print("列举出所有的国家及其数量:")
sql = "select nationality, count(*) as count from dataset group by nationality;"
cursor.execute(sql)
result = cursor.fetchall()
print(result)

print("年龄 >= 22,且显示的所有男性的年龄大于女性的平均年龄:")
sql = '''select * from dataset where age >= 22 and gender = '男' having age > (select avg(age) 
         from dataset where gender = '女');'''
cursor.execute(sql)
result = cursor.fetchall()
print(result)

print("只显示教育长度数量最多,年龄 >= 22 的数据:")
sql = '''SELECT * FROM dataset WHERE age >= 22 AND education = 
         ( SELECT education FROM dataset GROUP BY education ORDER BY count(*) DESC LIMIT 1 );'''
cursor.execute(sql)
result = cursor.fetchall()
print(result)



cursor.close()
connection.close()
